*** Datasets required
* ccs1.dta = Comparative Candidates Survey Module 1 
* ccs2.dta = Comparative Candidates Survey Module 2 
* ccs3.dta = Comparative Candidates Survey Module 3 
* partyfacts-external-parties = Comparative Candidates Survey Party Facts
* parlgov.xlsx = Parl Gov Project
* cpds.dta = Comparative Political Data Set (CPDS)
* V-Dem-CY-Core-v13.dta = Varieties of Democracy (V-Dem)
*** 

////	********************************************************		////
////	**************** CCS MODULE 1			*******************		////
////	********************************************************		////
/*
*** individual variables - all CCS mod 1 ***
	- gender (e1)
	- year of birth (e2)
	- citizenship at birth (e3)
	- education (e6a)
	- incumbent (a4b1)
	- left-right position (c3)
	- elected (t8)
	- SWD (d1)
	- party stood for at election (a1) (for match # & party names, see quest_ccs1.pdf Appendix 2 page 110)
	
also need country (t1) and election year (t3)
	
*/

use "ccs1.dta", clear

* keep only candidates to lower house
drop if t9==2 | t9==3

keep e1 e2 e3 e6a a4b1 c3 t8 d1 a1 t1 t3

rename e1 gender
rename e2 yearbirth
rename e3 citizenship
rename e6a education
rename a4b1 incumbent
rename c3 lrposition
rename t8 elected
rename d1 swd
rename a1 partycode
rename t1 countrycode
rename t3 electionyear

decode countrycode, gen(countryname)

*gen party_id for merge with party facts identifier --> and then match with parlgov
	* format party_id: module-countrycode-partycode-electionyear

gen module ="1"
tostring countrycode, replace force
tostring partycode, replace force 
tostring electionyear, replace force

gen party_id=module+ "-" + countrycode + "-" + partycode + "-" + electionyear

destring module, replace
drop countrycode
destring partycode, replace
destring electionyear, replace


save "ccs1_clean.dta", replace

*******************************************************************************

////	********************************************************		////
////	**************** CCS MODULE 2			*******************		////
////	********************************************************		////
/*
*** individual variables - all CCS mod 2 ***
	- gender (e1)
	- year of birth (e2)
	- citizenship at birth (e3)
	- education (e6a)
	- incumbent (a4b1)
	- left-right position (c3a)
	- elected (t11)
	- SWD (d1)
	- party stood for at election (a1) (for match # & party names, see appendix_ccs2.pdf "A_A1 Party coding scheme" page 81)

also need country (t1) and election year (t3)

*/


use "ccs2.dta", clear

rename *, lower

* keep only candidates to lower house
drop if t7==2 | t9==3

keep e1 e2 e3 e6a a4b1 c3a t11 d1 a1 t1 t3

rename e1 gender
rename e2 yearbirth
rename e3 citizenship
rename e6a education
rename a4b1 incumbent
rename c3a lrposition
rename t11 elected
rename d1 swd
rename a1 partycode
rename t1 countrycode
rename t3 electionyear

fre gender citizenship education incumbent lrposition elected swd

decode countrycode, gen(countryname)
replace countryname = subinstr(countryname, " 2012", "", .)
replace countryname = subinstr(countryname, " 2013", "", .)
replace countryname = subinstr(countryname, " 2014", "", .)
replace countryname = subinstr(countryname, " 2015", "", .)
replace countryname = subinstr(countryname, " 2016", "", .)
replace countryname = subinstr(countryname, " 2017", "", .)

replace education=education-1
recode education (0=1)

*gen party_id for merge with party facts identifier --> and then match with parlgov
	* format party_id: module-countrycode-partycode-electionyear

gen module ="2"
tostring countrycode, replace force
tostring partycode, replace force 
tostring electionyear, replace force

gen party_id=module+ "-" + countrycode + "-" + partycode + "-" + electionyear

destring module, replace
drop countrycode
destring partycode, replace
destring electionyear, replace


append using "ccs1_clean"

order countryname electionyear party_id partycode incumbent elected gender yearbirth citizenship education lrposition swd, first

save "ccs12.dta", replace

*******************************************************************************

////	********************************************************		////
////	**************** CCS MODULE 3			*******************		////
////	********************************************************		////
/*
*** individual variables - all CCS mod 3 ***
	- gender (e1)
	- year of birth (e2)
	- citizenship at birth (e3)
	- education (e5) [changed from last one] 
	- incumbent (a4b1)
	- left-right position (c5a) [changed from last one]
	- elected (t11)
	- SWD (d1)
	- party stood for at election (a1) (for match # & party names, see appendix_ccs2.pdf "A_A1 Party coding scheme" page 81)

also need country (t1) and election year (t3)

*/


use "ccs3.dta", clear

rename *, lower

* keep only candidates to lower house
drop if t7==2 | t9==3

keep e1 e2 e3 e5 a4b1 c5a t11 d1 a1 t1 t3

rename e1 gender
rename e2 yearbirth
rename e3 citizenship
rename e5 education
rename a4b1 incumbent
rename c5a lrposition
rename t11 elected
rename d1 swd
rename a1 partycode
rename t1 countrycode
rename t3 electionyear

fre gender citizenship education incumbent lrposition elected swd

replace countrycode = "Canada" if countrycode == "CAN_2021"
replace countrycode = "Switzerland" if countrycode == "CHE_2019"
replace countrycode = "Ireland" if countrycode == "IRL_2020"
replace countrycode = "Portugal" if countrycode == "PRT_2019"
replace countrycode = "Romania" if countrycode == "ROU_2020"
replace countrycode = "Finland" if countrycode== "FIN_2019" 
replace countrycode = "UK" if countrycode== "GBR_2019" 
replace countrycode = "Germany" if countrycode== "GER_2021" 
replace countrycode = "Sweden" if countrycode== "SWE_2018" 

generate countryname = countrycode

replace education=education-1
recode education (0=1)

*gen party_id for merge with party facts identifier --> and then match with parlgov
	* format party_id: module-countrycode-partycode-electionyear

gen module ="2"
tostring countrycode, replace force
tostring partycode, replace force 
tostring electionyear, replace force

gen party_id=module+ "-" + countrycode + "-" + partycode + "-" + electionyear

destring module, replace
drop countrycode
destring partycode, replace
destring electionyear, replace


append using "ccs12.dta"

order countryname electionyear party_id partycode incumbent elected gender yearbirth citizenship education lrposition swd, first

erase "ccs12.dta"
save "ccs14.dta", replace
clear


********************************************************************************

////	********************************************************		////
////	********************************************************		////
/*** party variables ** 
	- party family (Parlgov/party/family_name + CMP for trying to fill missing from Parlgov)
	- party in government (Parlgov/cabinet/cabinet_party)
	- party discipline (possibly V-dem V-party: party cohesion? and other organizational indicators)
*/
////	********************************************************		////
////	********************************************************		////


/////////////////////////////////////////////////////////////////////
***** assign partyfacts_id to CCS for merge  with parlgov dataset	*****
/////////////////////////////////////////////////////////////////////

import delimited "partyfacts-external-parties.csv", clear

keep if dataset_key=="parlgov"

*duplicates in this dataset, mostly independents except UK conservatives
duplicates tag partyfacts_id , gen(dup)
replace dup=0 if partyfacts_id==1567 & dataset_party_id=="773"
drop if dup>0

keep country dataset_party_id name partyfacts_id

rename country countryPG
rename name namePG
rename dataset_party_id parlgov_party_id
destring parlgov_party_id, replace

save "parlgov_id.dta", replace

import delimited "ccspartyfacts.csv", clear 

merge m:1 partyfacts_id using "parlgov_id"

keep if _merge==3

keep partyfacts_id parlgov_party_id countryPG namePG party_id
rename namePG partyname
destring parlgov_party_id, replace

save "ccspartyfacts.dta", replace 

********************************************************************************

/////////////////////////////////////////////////////////////////////////
*****	assign partyfacts_id & parlgov_id to CCS respondents data	*****
/////////////////////////////////////////////////////////////////////////


use "ccs14.dta", clear

merge m:1 party_id using "ccspartyfacts"

* some observations not matched - assign partyfacts_id manually 
	// many very small parties not in partyfacts, I dropped them
	
*National party Australia 2010
replace partyfacts_id=1743 if party_id=="1-1-3-2010"

*Germany 2017
replace partyfacts_id=1375 if party_id=="2-22-2-2017"
replace partyfacts_id=1731 if party_id=="2-22-3-2017"
replace partyfacts_id=383 if party_id=="2-22-4-2017"
replace partyfacts_id=1976 if party_id=="2-22-5-2017"
replace partyfacts_id=573 if party_id=="2-22-6-2017"
replace partyfacts_id=1545 if party_id=="2-22-7-2017"
replace partyfacts_id=1816 if party_id=="2-22-8-2017"

*Iceland 2016: 1-9 
replace partyfacts_id=1396 if party_id=="2-19-1-2016"
replace partyfacts_id=964 if party_id=="2-19-2-2016"
replace partyfacts_id=363 if party_id=="2-19-3-2016"
replace partyfacts_id=457 if party_id=="2-19-4-2016"
//replace partyfacts_id= if party_id=="2-19-5-2016"
//replace partyfacts_id= if party_id=="2-19-6-2016"
replace partyfacts_id=1224 if party_id=="2-19-7-2016"
//replace partyfacts_id= if party_id=="2-19-8-2016"
//replace partyfacts_id= if party_id=="2-19-9-2016"

*Iceland 2017: 1-9
replace partyfacts_id=1396 if party_id=="2-20-1-2017"
replace partyfacts_id=964 if party_id=="2-20-2-2017"
replace partyfacts_id=363 if party_id=="2-20-3-2017"
replace partyfacts_id=457 if party_id=="2-20-4-2017"
//replace partyfacts_id= if party_id=="2-20-5-2017"
//replace partyfacts_id= if party_id=="2-20-6-2017"
replace partyfacts_id=1224 if party_id=="2-20-7-2017"
//replace partyfacts_id= if party_id=="2-20-8-2017"
//replace partyfacts_id= if party_id=="2-20-9-2017"

*Czech republic 2017: 3-9
replace partyfacts_id=676 if party_id=="2-21-3-2017"
replace partyfacts_id=1728 if party_id=="2-21-4-2017"
replace partyfacts_id=466 if party_id=="2-21-5-2017"
//replace partyfacts_id= if party_id=="2-21-6-2017"
//replace partyfacts_id= if party_id=="2-21-7-2017"
//replace partyfacts_id= if party_id=="2-21-8-2017"
//replace partyfacts_id= if party_id=="2-21-9-2017"

*Canada 2021:
replace partyfacts_id=1739 if party_id=="2-Canada-1-2021" 
replace partyfacts_id=1004 if party_id=="2-Canada-2-2021" 
replace partyfacts_id=152 if party_id=="2-Canada-3-2021" 
replace partyfacts_id=1428 if party_id=="2-Canada-4-2021" 
replace partyfacts_id=1259 if party_id=="2-Canada-5-2021" 

*Switzerland 2019
replace partyfacts_id=360 if party_id=="2-Switzerland-1-2019" 
replace partyfacts_id=1808 if party_id=="2-Switzerland-2-2019" 
replace partyfacts_id=1819 if party_id=="2-Switzerland-3-2019" 
replace partyfacts_id=308 if party_id=="2-Switzerland-4-2019" 
replace partyfacts_id=1006 if party_id=="2-Switzerland-5-2019" 
replace partyfacts_id=1759 if party_id=="2-Switzerland-6-2019" 
replace partyfacts_id=1415 if party_id=="2-Switzerland-7-2019" 
replace partyfacts_id=1661 if party_id=="2-Switzerland-8-2019" 
replace partyfacts_id=1122 if party_id=="2-Switzerland-10-2019"
replace partyfacts_id=1123 if party_id=="2-Switzerland-11-2019"
replace partyfacts_id=631 if party_id=="2-Switzerland-12-2019"
replace partyfacts_id=669 if party_id=="2-Switzerland-13-2019"

*Partyfacts_id for these Swiss parties do not yet exist in the party facts database or are small parties/independents 
*(Geneva Citizens' Movement, Alternative List / Sol – Solidarity / EAG – Together on the Left, Other Party, No Party) 
replace partyfacts_id=. if party_id=="2-Switzerland-9-2019"
replace partyfacts_id=. if party_id=="2-Switzerland-14-2019"
replace partyfacts_id=. if party_id=="2-Switzerland-20-2019"
replace partyfacts_id=. if party_id=="2-Switzerland-97-2019"   

*Ireland 2020
replace partyfacts_id=1288 if party_id=="2-Ireland-1-2020"
replace partyfacts_id=1055 if party_id=="2-Ireland-2-2020"
replace partyfacts_id=562 if party_id=="2-Ireland-3-2020"
replace partyfacts_id=1775 if party_id=="2-Ireland-4-2020"
replace partyfacts_id=4 if party_id=="2-Ireland-5-2020"
replace partyfacts_id=4870 if party_id=="2-Ireland-6-2020"
replace partyfacts_id=242 if party_id=="2-Ireland-7-2020"

*Partyfacts_id for these Irish parties do not yet exist in the party facts database or are small parties/independents 
*(No party/independent,Renua Ireland, Aontú, Irish Freedom Party) 
replace partyfacts_id=. if party_id=="2-Ireland-8-2020"
replace partyfacts_id=. if party_id=="2-Ireland-9-2020"
replace partyfacts_id=. if party_id=="2-Ireland-10-2020"
replace partyfacts_id=. if party_id=="2-Ireland-11-2020"

*Portugal 2019
replace partyfacts_id=1310 if party_id=="2-Portugal-1-2019"
replace partyfacts_id=1308 if party_id=="2-Portugal-2-2019"  
replace partyfacts_id=97 if party_id=="2-Portugal-3-2019"
replace partyfacts_id=886 if party_id=="2-Portugal-4-2019"
replace partyfacts_id=310 if party_id=="2-Portugal-5-2019"
replace partyfacts_id=1359 if party_id=="2-Portugal-6-2019"
replace partyfacts_id=655 if party_id=="2-Portugal-7-2019"
replace partyfacts_id=896 if party_id=="2-Portugal-11-2019"

*Partyfacts_id for these Portuguese parties do not yet exist in the party facts database or are small parties/independents 
*(Iniciativa Liberal, LCI, PSR, MPT, MEP, MES)  
replace partyfacts_id=. if party_id=="2-Portugal-9-2019"
replace partyfacts_id=. if party_id=="2-Portugal-12-2019"
replace partyfacts_id=. if party_id=="2-Portugal-13-2019"
replace partyfacts_id=. if party_id=="2-Portugal-14-2019"
replace partyfacts_id=. if party_id=="2-Portugal-15-2019"
replace partyfacts_id=. if party_id=="2-Portugal-16-2019"

*Partyfacts_id for UK 2019 
replace partyfacts_id=1567 if party_id=="2-UK-1-2019" 
replace partyfacts_id=1794 if party_id=="2-UK-2-2019" 
replace partyfacts_id=1516 if party_id=="2-UK-3-2019" 
replace partyfacts_id=1388 if party_id=="2-UK-4-2019" 
replace partyfacts_id=1002 if party_id=="2-UK-5-2019" 
replace partyfacts_id=986 if party_id=="2-UK-6-2019" 
*7 = The Brexit Party (not included in party facts dataset) 

*Partyfacts_id for Sweden 2018
replace partyfacts_id=830 if party_id=="2-Sweden-1-2018"
replace partyfacts_id=379 if party_id=="2-Sweden-2-2018"
replace partyfacts_id=199 if party_id=="2-Sweden-3-2018"
replace partyfacts_id=1274 if party_id=="2-Sweden-4-2018"
replace partyfacts_id=651 if party_id=="2-Sweden-6-2018"
replace partyfacts_id=456 if party_id=="2-Sweden-7-2018"
replace partyfacts_id=409 if party_id=="2-Sweden-8-2018"
replace partyfacts_id=1240 if party_id=="2-Sweden-9-2018"


*GER_2021 (parties not collected) 


**Partyfacts_id for Finland 2019
replace partyfacts_id=1303 if party_id=="2-Finland-1-2019"
replace partyfacts_id=495 if party_id=="2-Finland-3-2019"
replace partyfacts_id=901 if party_id=="2-Finland-4-2019"
replace partyfacts_id=479 if party_id=="2-Finland-5-2019"
replace partyfacts_id=1044 if party_id=="2-Finland-6-2019"
replace partyfacts_id=1229 if party_id=="2-Finland-7-2019"
replace partyfacts_id=703 if party_id=="2-Finland-8-2019"
replace partyfacts_id=249 if party_id=="2-Finland-13-2019"
replace partyfacts_id=7030 if party_id=="2-Finland-16-2019"


drop countryPG
drop if _merge==2
drop if _merge==1 & partyfacts_id==.
drop _merge

merge m:1 partyfacts_id using "parlgov_id", update
drop if _merge==2
drop _merge

rename partyname oldpartyname
rename namePG partyname

order countryname electionyear party_id partycode partyname partyfacts_id parlgov_party_id module, first
save "ccs14.dta", replace
clear


********************************************************************************

/////////////////////////////////////////////////////////////////////////
*****	merge with party family & party in goverment	*****
/////////////////////////////////////////////////////////////////////////

* party family (Parlgov/party/family_name)
import excel "parlgov.xlsx", sheet("party") firstrow clear

keep family_name party_id
rename party_id parlgov_party_id
save "parlgov/partyfamily.dta", replace

********************************************************************************

* party in government (Parlgov/cabinet/cabinet_party)
import excel "parlgov.xlsx", sheet("cabinet") firstrow clear

gen electionyear= substr(election_date,1,4)
destring electionyear, replace
drop if electionyear<2005

gen startdate= substr(start_date,1,4)
destring startdate, replace
drop if startdate!=electionyear

gen startmonth= substr(start_date,6,2)
destring startmonth, replace

bysort country_name electionyear: egen minmonth=min(startmonth)
keep if startmonth==minmonth

bysort electionyear party_id: gen order=_n
keep if order==1

keep cabinet_party prime_minister seats election_seats_total party_id electionyear
rename party_id parlgov_party_id
save "parlgov/cabinet.dta", replace


use "ccs14.dta", clear
sort countryname electionyear partyname

merge m:1 parlgov_party_id using "parlgov/partyfamily"
drop if _merge==2
drop _merge

merge m:1 parlgov_party_id electionyear using "parlgov/cabinet"

drop if _merge==2 
drop countryPG 

*** problem CDU-CSU germany not sure why, recode manually
*Germany 2005 CDU-CSU
replace cabinet_party=1 if parlgov_party_id==808 & electionyear==2005
replace prime_minister=1 if parlgov_party_id==1180 & electionyear==2005


*Germany 2009 CDU-CSU
replace cabinet_party=1 if parlgov_party_id==808 & electionyear==2009
replace prime_minister=1 if parlgov_party_id==1180 & electionyear==2009

// still seat & election_seats_total to complete

*Germany 2013 CDU-CSU
replace cabinet_party=1 if parlgov_party_id==808 & electionyear==2013
replace prime_minister=1 if parlgov_party_id==1180 & electionyear==2013


// still seat & election_seats_total to complete
replace cabinet_party=0 if cabinet_party==.
replace prime_minister=0 if prime_minister==.

drop _merge

save "ccs14.dta", replace
clear

*******************************************************************************

*** CPDS ***

use "cpds.dta",clear

replace country="UK" if country=="United Kingdom"
rename country countryname
rename year electionyear

keep countryname electionyear prop gov_type effpar_ele effpar_leg dis_gall fed pres outlays realgdpgr inflation debt postfisc_gini

save "cpds_cpds_coded.dta",replace

use "ccs14.dta", clear

merge m:1 countryname electionyear using "cpds/cpds_coded"
drop if _merge==2
drop _merge

save "ccs14.dta", replace

*******************************************************************************

*** V-DEM (electoral integrity index, v2x_polyarchy, EMB autonomy, and EMB capacity) *** 

use "V-Dem-CY-Core-v13.dta", clear

rename country_name countryname
rename year electionyear

keep countryname electionyear v2x_polyarchy v2x_elecoff v2xel_frefair v2x_frassoc_thick v2x_freexp_altinf v2x_suffr v2elembaut v2elembaut_mean v2elembcap v2elembcap_mean

rename v2x_polyarchy integrity 
replace integrity =integrity*100

rename v2elembaut EMBautonomy
rename v2elembaut_mean MeanEMBautonomy 

rename v2elembcap EMBcapacity
rename v2elembcap_mean MeanEMBcapacity 

replace countryname="UK" if countryname=="United Kingdom"
replace countryname="Czech Republic" if countryname=="Czechia"

save "VDEM_country_vdem_coded.dta", replace

use "ccs14.dta", clear

merge m:1 countryname electionyear using "VDEM_country_vdem_coded.dta"
drop if _merge==2
drop _merge

clonevar swdnew = swd
recode swdnew (1=4) (2=3) (3=2) (4=1) (-1=.) (-9=.)  
label define swdnew 1 "Not at all satisfied" 2 "Not very satisfied" 3 "fairly satisfied" 4 "very satisfied"
label values swdnew swdnew 

egen MeanLRPosition = mean(lrposition), by(countryname electionyear)  

recode yearbirth (1=.) (2=.) (3=.) (4=.) (5=.) (6=.) (2022=.)  
*Observations for yearbirth in some countries were coded as 1, 2, 3, 4 (rather than the year of birth) so observations were replaced with "."

generate age = electionyear - yearbirth
*Formula for age of politician at the time they filled out the survey

recode age (2022=.) (2028=.) (2029=.)  
*Some newer surveys did not ask for yearbirth so age is not calculated* 

recode education (99=.) 
*Replacing observations for education that are unknown (99) with "." 

recode elected (-1=.) 
*Replacing observations when the Q was not asked (-1) with "." 

recode incumbent (-1=.) (-2=.) (-9=.) 

recode gender (-9=.)

recode citizenship (-9=.) (-1=.) 

recode education (-10=.) (-1=.)

recode lrposition (-9=.) 

drop if countryname=="Malta"
drop if countryname=="Netherlands" 


save "ccs14.dta", replace

*******************************************************************************

** Checking variables / observations ** 

summarize countryname
tab countryname 

summarize electionyear
tab electionyear

summarize swdnew
tabulate swdnew 
tabulate countryname, summarize (swdnew)

summarize incumbent
tab incumbent 
tabulate countryname, summarize (incumbent) 
tabulate swdnew, summarize (incumbent) 
* # of observations is too low - removing as control variable 

summarize elected
tab elected 
tabulate countryname, summarize (elected) 
tabulate swdnew, summarize (elected) 
* Will use cabinet_party for winner vs. loser to keep observations higher 

summarize cabinet_party
tab cabinet_party
tabulate countryname, summarize (cabinet_party)
tabulate swdnew, summarize (cabinet_party)
egen Cabinet_mean = mean(cabinet_party), by(countryname electionyear)
tabulate countryname, summarize (Cabinet_mean)
* Use this for interaction / control winner vs. loser

summarize lrposition
tab lrposition 
tabulate countryname, summarize (lrposition)
tabulate swdnew, summarize (lrposition)
* Use this for interaction / control 
* This variable causes several observations to be dropped 

summarize integrity
tab integrity 
tabulate countryname, summarize (integrity)
tabulate swdnew, summarize (integrity)

*summarize pei
*tab pei 
*tabulate countryname, summarize (pei)
*tabulate swdnew, summarize (pei)
*Observations too low - can use this as additional check for trends 

summarize EMBcapacity
tab EMBcapacity 
tabulate countryname, summarize (EMBcapacity)
tabulate swdnew, summarize (EMBcapacity)

summarize MeanEMBcapacity
tab MeanEMBcapacity 
tabulate countryname, summarize (MeanEMBcapacity)
tabulate swdnew, summarize (MeanEMBcapacity)

summarize EMBautonomy 
tab EMBautonomy
tabulate countryname, summarize (EMBautonomy)
tabulate swdnew, summarize (EMBautonomy)

summarize MeanEMBautonomy
tab MeanEMBautonomy 
tabulate countryname, summarize (MeanEMBautonomy)
tabulate swdnew, summarize (MeanEMBautonomy)

summarize partyname 
tab partyname 

summarize gender
tab gender
tabulate countryname, summarize (gender)
tabulate swdnew, summarize (gender)
recode gender (1=0) 
recode gender (2=1) 
label values gender 
label define gender 0 "male" 1 "female",  modify 
tab gender 

summarize age
tab age
tabulate countryname, summarize (age)
tabulate swdnew, summarize (age)
*Not including in control for weights as it substantially lowers the number of observations, elections, and countries 

summarize citizenship
tab citizenship
tabulate countryname, summarize (citizenship)
tabulate swdnew, summarize (citizenship)
*Not including in control for weights as it lowers the number of observations / there are very few non-country at birth  

summarize education
tab education 
tabulate countryname, summarize (education)
tabulate swdnew, summarize (education)
*Not including in control for weights as it lowers the number of observations / primarily all have post-secondary or more 

summarize fed
tab fed
tabulate countryname, summarize (fed)
tabulate swdnew, summarize (fed)
*Coding decision - create a dummy variable for fed: Federalism or no federalism
recode fed (2=1)
label list fed 
label define fed 0 "No federalism" 1 "Federalism", modify 
summarize fed
tab fed
tabulate countryname, summarize (fed)

** Missing data for Canada2021: 
replace fed=1 if countryname=="Canada" & electionyear==2021

summarize pres
tab pres
tabulate countryname, summarize (pres)
tabulate swdnew, summarize (pres)
*Coding decision - create a dummy variable for pres: Parliamentary or Mixed (no observations have a presidential system) 
label list pres
recode pres (2=1) (3=1) (4=1) 
label define pres 1 "Mixed", modify 
tab pres
tabulate countryname, summarize (pres)

** Missing data for Canada2021: 
replace pres=0 if countryname=="Canada" & electionyear==2021

summarize effpar_ele
tab effpar_ele 
tabulate countryname, summarize (effpar_ele)
tabulate swdnew, summarize (effpar_ele)

*summarize dis_gall
*tab dis_gall
*tabulate countryname, summarize (dis_gall)
*tabulate swdnew, summarize (dis_gall)

replace dis_gall=3.70 if countryname=="Canada" & electionyear==2021
replace effpar_ele=3.84 if countryname=="Canada" & electionyear==2021 

** Missing data for Canada2021

summarize realgdp 
tab realgdp 
tabulate countryname, summarize (realgdp)
tabulate swdnew, summarize (realgdp)

replace realgdp=5.3 if countryname=="Canada" & electionyear==2021


save "ccs16.dta", replace

*******************************************************************************

*** WEIGHTS *** 
*** Party weights within countries (i.e. adjusting for unequal # of respondents across parties - within country)
 
egen missing=rowmiss(swdnew cabinet_party) 
drop if missing!=0 

egen election_group=group(countryname electionyear)
tabulate election_group 
tabulate countryname
* Number of election groups = 49 
* id for each party irrespective of country-elections : the number 43 below is based on # of election_group above
* Number of countries = 21

forvalues i=1/49 {
	egen groupparty`i'=group(partyname) if election_group==`i'
}
gen groupparty=.
forvalues i=1/49 {
	replace groupparty=groupparty`i' if election_group==`i'
	}
drop groupparty1-groupparty49
 
* number of party by election study
bysort election_group: egen number_party=max(groupparty)
 
* number of total respondent by election study
bysort election_group: egen numbertotalrespondent=count(missing) 
 
* number of respondent by party by election study
bysort election_group groupparty: egen numberpartyrespondent=count(missing) 
 
* representative proportion of respondent by party
gen rep_prop= numbertotalrespondent/number_party/numbertotalrespondent*100
 
* sample proportion of respondent by party
gen samp_prop= numberpartyrespondent/numbertotalrespondent*100
 
gen party_weight=rep_prop/samp_prop
 
*** Country weights (i.e. adjusting for unequal # of  across countries) - # 21 below based on number of countries used, you want to verify this number and adjust accordingly
 
egen numbertotalrespondentT=count(missing)
 
* number of total respodent by country
bysort countryname: egen numbertotalrespondentC=count(missing)
 
* representative proportion of respondent by country
gen rep_propC= numbertotalrespondentT/21/numbertotalrespondentT*100
 
* sample proportion of respondent by country
gen samp_propC= numbertotalrespondentC/numbertotalrespondentT*100
 
gen country_weight=rep_propC/samp_propC
 
*** party-country weights – this is the variable you want to use
gen allweight=party_weight*country_weight

save "ccs16.dta", replace

*********************************************************

** Creating country-level aggregate for mean SWD, mean SWD (winners), mean SWD (losers), mean Cabinet, mean Left-Right 

gen weighted_SWD = swdnew * party_weight
egen total_weighted_SWD = total(weighted_SWD), by(countryname electionyear)
egen total_weights = total(party_weight), by(countryname electionyear)
gen weighted_mean_SWD = total_weighted_SWD / total_weights

gen weighted_Winner = swdnew * party_weight
gen winner = cabinet_party==1
egen total_weighted_Winner = total(weighted_Winner * winner), by(countryname electionyear)
egen total_weights_Winner = total(party_weight * winner), by(countryname electionyear)
gen Winner_weighted_mean = total_weighted_Winner / total_weights_Winner

gen weighted_Loser = swdnew * party_weight
gen loser = cabinet_party==0
egen total_weighted_Loser = total(weighted_Loser * loser), by(countryname electionyear)
egen total_weights_Loser = total(party_weight * loser), by(countryname electionyear)
gen Loser_weighted_mean = total_weighted_Loser / total_weights_Loser

**Random Verification (to check if it matches manual inputs): 
reg swdnew [pw=party_weight] if countryname=="Australia" & electionyear==2007
reg swdnew [pw=party_weight] if countryname=="Australia" & electionyear==2007 & cabinet_party==1 
reg swdnew [pw=party_weight] if countryname=="Australia" & electionyear==2007 & cabinet_party==0

gen weighted_Cabinet = cabinet_party * party_weight
egen total_weighted_Cabinet = total(weighted_Cabinet), by(countryname electionyear)
gen weighted_mean_Cabinet = total_weighted_Cabinet / total_weights

summarize weighted_mean_Cabinet
tab weighted_mean_Cabinet
tabulate countryname, summarize (weighted_mean_Cabinet)
tabulate swdnew, summarize (weighted_mean_Cabinet)

gen weighted_lrposition = lrposition * party_weight
egen total_weighted_lrposition = total(weighted_lrposition), by(countryname electionyear)
gen weighted_mean_lrposition = total_weighted_lrposition / total_weights

summarize weighted_mean_lrposition
tab weighted_mean_lrposition
tabulate countryname, summarize (weighted_mean_lrposition)
tabulate swdnew, summarize (weighted_mean_lrposition)

order countryname electionyear weighted_mean_SWD Winner_weighted_mean Loser_weighted_mean weighted_mean_Cabinet weighted_mean_lrposition integrity EMBcapacity MeanEMBcapacity EMBautonomy MeanEMBautonomy swdnew, first

save "ccs16.dta", replace

*********************************************************

** Analysis - Country-level Regression ** 

** Aggregate by country / election year regression 

bysort countryname electionyear: gen order=_n 
keep if order==1
tab countryname 
tab electionyear 
tab weighted_mean_SWD
tab integrity
tab MeanEMBautonomy
tab MeanEMBcapacity
tab fed 
tab pres
tab dis_gall
tab realgdpgr

reg weighted_mean_SWD integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust  

reg Winner_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust 
reg Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust 

reg Winner_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
reg Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust

reg weighted_mean_SWD integrity MeanEMBautonomy c.MeanEMBcapacity##c.weighted_mean_lrposition i.fed i.pres effpar_ele realgdpgr, robust  

save "Analysis2.dta", replace 

clear all

*********************************************************

** Results ** 
use "Analysis2.dta"

****************************************************** 

*List of Hypotheses: 
*H1a: Politicians' SWD will be higher in countries with more electoral integrity
*H1b: The effect of electoral integrity is greater for winners than losers. 
*H1c: The winner-loser gap in democratic satisfaction increases as electoral integrity increases. 
*H2: Politicians' SWD will be higher in countries with EMBs with more independence.
*H3a: Politicians' SWD will be higher in countries with EMBs that have greater resources.
*H3b: The effect of EMBs having greater resources is greater for politicians on the ideological left than politicians on the ideological right. 

****************************************************** 

* Regression for H1a, H2, and H3a: 
* (EI, EMB Autonomy, and EMB Capacity)

reg weighted_mean_SWD integrity, robust
eststo m101 
reg weighted_mean_SWD MeanEMBautonomy, robust
eststo m102 
reg weighted_mean_SWD MeanEMBcapacity, robust
eststo m103 
reg weighted_mean_SWD integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
eststo m1 
estat vif

* Clean Regression for H1a / H2 / H3a: 
# delimit ;
esttab m101 m102 m103 m1 , b(2) se(2) ar2
nobaselevels
coeflabels(integrity "Electoral Integrity" MeanEMBautonomy "EMB Independence" MeanEMBcapacity "EMB Capacity" 0.fed "No Federalism" 1.fed "Federalism" 0.pres "Parliamentary System" 1.pres "Mixed System" effpar_ele "Effective Number of Parties" realgdpgr "GDP" _cons "Constant")
star(* 0.10 ** 0.05 *** 0.01) staraux
nodep mtitles("Integrity" "Independence" "Capacity" "Controls")
onecell wide varwidth(32) modelwidth(18);
#delimit cr


* Graph for H1a, H2, H3a: 
quietly: reg weighted_mean_SWD integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
quietly: margins, at (integrity=(60(10)100)) 
# delimit ;
marginsplot, addplot(hist integrity, percent yaxis(2) yscale(alt) fcolor(%25) ytitle(Percentage, axis(2)))
title(Electoral Integrity on SWD , size(medsmall))
legend(order(1 "EI")) 
xtitle(Electoral Integrity) ytitle(SWD);
#delimit cr
graph export "updatedgraph.jpg", replace 

****************************************************** 

* Regressions for H1b:
* (EI on Winners/Losers)

* H1b: SWD and Winning (no interaction)
reg Winner_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
eststo m2

* H1b: SWD and Losing (no interaction)
reg Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
eststo m3 

* Clean Regression for H1b: 
# delimit ;
esttab m2 m3 , b(2) se(2) ar2
nobaselevels
coeflabels(integrity "Electoral Integrity" MeanEMBautonomy "EMB Independence" MeanEMBcapacity "EMB Capacity" 0.fed "No Federalism" 1.fed "Federalism" 0.pres "Parliamentary System" 1.pres "Mixed System" effpar_ele "Effective Number of Parties" realgdpgr "GDP" _cons "Constant")
star(* 0.10 ** 0.05 *** 0.01) staraux
nodep mtitles("Winner" "Loser")
onecell wide varwidth(32) modelwidth(18);
#delimit cr

* WITH 49 FOR LOSERS * 

* Graph for H1b (Winning):
quietly: reg Winner_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
quietly: margins, at (integrity=(60(10)100)) 
# delimit ;
marginsplot, addplot(hist integrity, percent yaxis(2) yscale(alt) fcolor(%25) ytitle(Percentage, axis(2)))
title(Electoral Integrity on SWD (Winning) , size(medsmall))
legend(order(1 "EI")) 
xtitle(Electoral Integrity) ytitle(SWD);
#delimit cr
graph export "winningSWD.jpg", replace 

* Graph for H1b (Losing): 
quietly: reg Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
quietly: margins, at (integrity=(60(10)100)) 
# delimit ;
marginsplot, addplot(hist integrity, percent yaxis(2) yscale(alt) fcolor(%25) ytitle(Percentage, axis(2)))
title(Electoral Integrity on SWD (Losing) , size(medsmall))
legend(order(1 "EI")) 
xtitle(Electoral Integrity) ytitle(SWD);
#delimit cr
graph export "losingSWD.jpg", replace

	
	  
clear all 

****************************************************** 

** Additional Test for H1b: 

use "Analysis2.dta"

save "Analysis3.dta", replace

use "Analysis3.dta" 

drop if countryname=="Canada" & electionyear==2008
drop if countryname=="Czech Republic" & electionyear==2017
drop if countryname=="Greece" & electionyear==2012
drop if countryname=="Iceland" & electionyear==2016
drop if countryname=="Romania" & electionyear==2016
*No responses/observations from 'winners' in these countries 

* H1b: SWD and Winning (no interaction)
reg Winner_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
eststo m8

* H1b: SWD and Losing (no interaction)
reg Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
eststo m9 

* Clean Regression for H1b: 
# delimit ;
esttab m8 m9 , b(2) se(2) ar2
nobaselevels
coeflabels(integrity "Electoral Integrity" MeanEMBautonomy "EMB Independence" MeanEMBcapacity "EMB Capacity" 0.fed "No Federalism" 1.fed "Federalism" 0.pres "Parliamentary System" 1.pres "Mixed System" effpar_ele "Effective Number of Parties" realgdpgr "GDP" _cons "Constant")
star(* 0.10 ** 0.05 *** 0.01) staraux
nodep mtitles("Winner" "Loser")
onecell wide varwidth(32) modelwidth(18);
#delimit cr

*FINAL GRAPH FOR H1b: 

quietly: reg Winner_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
quietly: margins, at (integrity=(60(10)100)) saving(file100, replace) 
quietly: reg Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity i.fed i.pres effpar_ele realgdpgr, robust
quietly: margins, at (integrity=(60(10)100)) saving(file101, replace) 
combomarginsplot file100 file101, addplot(hist integrity, percent yaxis(2) yscale(alt) fcolor(%25) ytitle(Percentage, axis(2))) labels("Winner SWD" "Loser SWD") title("Electoral Integrity on SWD (Winners/Losers)") xtitle(Electoral Integrity) ytitle(SWD) 
graph export "44OnlyWinnerLoser.jpg", replace


clear all

****************************************************** 

* Additional Tables: 

use "Analysis2.dta"

* Graph for Average SWD by country
graph bar weighted_mean_SWD, over(countryname, label(angle(45))) ///
    title("Average SWD by Country") ytitle("SWD") ///
    bar(1, bcolor(eltblue))
graph export "AverageSWD.jpg", replace 

* Graph for Mean / Standard Deviation of each Variables
*summarize weighted_mean_SWD weighted_mean_Cabinet weighted_mean_lrposition integrity MeanEMBcapacity MeanEMBautonomy fed pres dis_gall realgdpgr, detail

clear all

*******************************************************

* Additional Tables: 

use "Analysis2.dta"

** Table 2 ** 
estpost summarize weighted_mean_SWD Winner_weighted_mean Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity fed pres effpar_ele realgdpgr
esttab using table1.rtf, cells("mean sd") replace

estpost summarize weighted_mean_SWD Winner_weighted_mean Loser_weighted_mean integrity MeanEMBautonomy MeanEMBcapacity fed pres effpar_ele realgdpgr
esttab using table3.rtf, cells("mean sd") replace

clear all 


*******************************************************

 









